/*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License
* Version 1.0 (the "License"). You may not use this file except in
* compliance with the License. A copy of the License is available at
* http://www.sun.com/
*
* The Original Code is Forte for Java, Community Edition. The Initial
* Developer of the Original Code is Sun Microsystems, Inc. Portions
* Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved.
*/
package org.netbeans.modules.web.core.syntax;
import org.netbeans.editor.Syntax;
/**
* Syntax class for JSP tags. It is not meant to be used by itself, but as one of syntaxes with
* MultiSyntax. Recognizes JSP tags, comments and directives. Does not recognize scriptlets,
* expressions and declarations, which should be rocognized by the master syntax, as expressions
* can appear embedded in a JSP tag. Moreover, they all share Java syntax.
*
* @author Petr Jiricka
* @version 1.00
*/
public class JspTagSyntax extends Syntax {
// Token names
public static final String TN_JSP_TAG = "jsp-tag-directive";
public static final String TN_JSP_SYMBOL = "jsp-symbol";
public static final String TN_JSP_COMMENT = "jsp-comment";
public static final String TN_JSP_ATTRIBUTE = "jsp-attribute-name";
public static final String TN_JSP_ATTR_VALUE = "jsp-attribute-value";
public static final String TN_JSP_SYMBOL2 = "jsp-scriptlet-delimiter";
// Token IDs
public static final int TEXT = 0; // plain text
public static final int ERROR = 1; // errorneous text
public static final int JSP_TAG = 2; // html tag
public static final int JSP_SYMBOL = 3; // operators like '+', '*=' etc.
public static final int JSP_COMMENT = 4; // block comment
public static final int JSP_ATTRIBUTE = 5; // argument
public static final int JSP_ATTR_VALUE = 6; // string constant e.g. "string"
public static final int JSP_SYMBOL2 = 7; // java code delimiters (<%, <%=, <%!, %>
// Internal states
// general
private static final int ISI_ERROR = 1; // when the fragment does not start with <
private static final int ISA_LT = 2; // after '<' char
// tags and directives
private static final int ISI_TAG = 3; // inside JSP tag
private static final int ISI_DIR = 4; // inside JSP directive
private static final int ISP_TAG = 5; // after JSP tag
private static final int ISP_DIR = 6; // after JSP directive
private static final int ISI_TAG_I_WS = 7; // inside JSP tag after whitespace
private static final int ISI_DIR_I_WS = 8; // inside JSP directive after whitespace
private static final int ISI_ENDTAG = 9; // inside end JSP tag
private static final int ISI_TAG_ATTR = 10; // inside tag attribute
private static final int ISI_DIR_ATTR = 11; // inside directive attribute
private static final int ISP_TAG_EQ = 12; // just after '=' in tag
private static final int ISP_DIR_EQ = 13; // just after '=' in directive
private static final int ISI_TAG_STRING = 14; // inside string (value - "") in tag
private static final int ISI_DIR_STRING = 15; // inside string (value - "") in directive
private static final int ISI_TAG_STRING_B = 16; // inside string (value - "") after backslash in tag
private static final int ISI_DIR_STRING_B = 17; // inside string (value - "") after backslash in directive
private static final int ISI_TAG_STRING2 = 18; // inside string (value - '') in tag
private static final int ISI_DIR_STRING2 = 19; // inside string (value - '') in directive
private static final int ISI_TAG_STRING2_B = 20; // inside string (value - '') after backslash in tag
private static final int ISI_DIR_STRING2_B = 21; // inside string (value - '') after backslash in directive
private static final int ISA_ENDSLASH = 22; // after ending '/' in JSP tag
private static final int ISA_ENDPC = 23; // after ending '%' in JSP directive
// comments (+directives)
private static final int ISA_LT_PC = 24; // after '<%' (comment or directive)
private static final int ISI_JSP_COMMENT = 25; // after <%-
private static final int ISI_JSP_COMMENT_M = 26; // inside JSP comment after -
private static final int ISI_JSP_COMMENT_MM = 27; // inside JSP comment after --
private static final int ISI_JSP_COMMENT_MMP = 28; // inside JSP comment after --%
// end state
static final int ISA_END_JSP = 29; // JSP fragment has finished and control
// should be returned to master syntax
// more errors
private static final int ISI_TAG_ERROR = 30; // error in tag, can be cleared by > or \n
private static final int ISI_DIR_ERROR = 31; // error in directive, can be cleared by %> or \n
private static final int ISI_DIR_ERROR_P = 32; // error in directive after %, can be cleared by > or \n
// additional states which had to be added in the process of improving this class
private static final int ISA_LT_PC_AT = 33; // after '<%@' (directive)
public JspTagSyntax() {
highestTokenID = JSP_SYMBOL2;
}
public boolean isIdentifierPart(char ch) {
return Character.isJavaIdentifierPart(ch);
}
protected int parseToken() {
char actChar;
while(offset < stopOffset) {
actChar = buffer[offset];
switch (state) {
case INIT:
switch (actChar) {
case '\n':
offset++;
return EOL;
case '<':
state = ISA_LT;
break;
default:
state = ISI_ERROR;
break;
}
break;
case ISA_LT:
if ((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar == '_')
) { // possible tag begining
state = ISI_TAG;
return JSP_SYMBOL;
}
switch (actChar) {
case '/':
offset++;
state = ISI_ENDTAG;
return JSP_SYMBOL;
case '\n':
state = ISI_TAG_ERROR;
return JSP_SYMBOL;
case '%':
state = ISA_LT_PC;
break;
default:
state = ISI_TAG_ERROR;
break;
}
break;
case ISI_TAG:
case ISI_DIR:
if (!((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar >= '0' && actChar <= '9') ||
(actChar == '_') ||
(actChar == ':'))
) { // not alpha
state = ((state == ISI_TAG) ? ISP_TAG : ISP_DIR);
return JSP_TAG;
}
break;
case ISP_TAG:
case ISP_DIR:
if ((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar == '_')
) {
state = ((state == ISP_TAG) ? ISI_TAG_ATTR : ISI_DIR_ATTR);
break;
}
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return string first
return JSP_TAG;
}
case '>': // for tags
if (state == ISP_TAG) {
if (offset == tokenOffset) { // no char
offset++;
state = ISA_END_JSP;
return JSP_SYMBOL;
}
else { // return string first
return JSP_TAG;
}
}
else { // directive
//state = ISI_DIR_ERROR;
//commented out to minimize errors during the process of writing directives
break;
}
case '/': // for tags
if (state == ISP_TAG) {
if (offset == tokenOffset) { // no char
state = ISA_ENDSLASH;
break;
}
else { // return string first
return JSP_TAG;
}
}
else { // directive
//state = ISI_DIR_ERROR;
//commented out to minimize errors during the process of writing directives
break;
}
case '%': // for directives
if (state == ISP_DIR) {
if (offset == tokenOffset) { // no char
state = ISA_ENDPC;
break;
}
else { // return string first
return JSP_TAG;
}
}
else { // tag
state = ISI_TAG_ERROR;
break;
}
case '=':
offset++;
state = ((state == ISP_TAG) ? ISP_TAG_EQ : ISP_DIR_EQ);
return JSP_SYMBOL;
case ' ':
case '\t':
state = ((state == ISP_TAG) ? ISI_TAG_I_WS : ISI_DIR_I_WS);
break;
/*case '<': // assume that this is the start of the next tag
state = ISA_END_JSP;
return JSP_TAG;*/
}
break;
case ISI_TAG_I_WS:
case ISI_DIR_I_WS:
switch (actChar) {
case ' ':
case '\t':
break;
default:
state = ((state == ISI_TAG_I_WS) ? ISP_TAG : ISP_DIR);
return JSP_TAG; // currently color as text
}
break;
case ISI_ENDTAG:
if (!((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar >= '0' && actChar <= '9') ||
(actChar == '_') ||
(actChar == ':'))
) { // not alpha
state = ISP_TAG;
return JSP_TAG;
}
break;
case ISI_TAG_ATTR:
case ISI_DIR_ATTR:
if (!((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar >= '0' && actChar <= '9') ||
(actChar == '_') ||
(actChar == '-'))
) { // not alpha or '-' (http-equiv)
state = ((state == ISI_TAG_ATTR) ? ISP_TAG : ISP_DIR);
return JSP_ATTRIBUTE;
}
break;
case ISP_TAG_EQ:
case ISP_DIR_EQ:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return string first
return JSP_ATTR_VALUE;
}
case '"':
state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING : ISI_DIR_STRING);
break;
case '\'':
state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING2 : ISI_DIR_STRING2);
break;
case ' ':
case '\t':
// don't change the state
break;
default:
state = ((state == ISP_TAG_EQ) ? ISP_TAG : ISP_DIR);
return JSP_ATTR_VALUE;
}
break;
case ISI_TAG_STRING:
case ISI_DIR_STRING:
case ISI_TAG_STRING2:
case ISI_DIR_STRING2:
if ((actChar == '"') && ((state == ISI_TAG_STRING) || (state == ISI_DIR_STRING))) {
offset++;
state = ((state == ISI_TAG_STRING) ? ISP_TAG : ISP_DIR);
return JSP_ATTR_VALUE;
}
if ((actChar == '\'') && ((state == ISI_TAG_STRING2) || (state == ISI_DIR_STRING2))) {
offset++;
state = ((state == ISI_TAG_STRING2) ? ISP_TAG : ISP_DIR);
return JSP_ATTR_VALUE;
}
switch (actChar) {
case '\\':
switch (state) {
case ISI_TAG_STRING:
state = ISI_TAG_STRING_B;
break;
case ISI_DIR_STRING:
state = ISI_DIR_STRING_B;
break;
case ISI_TAG_STRING2:
state = ISI_TAG_STRING2_B;
break;
case ISI_DIR_STRING2:
state = ISI_DIR_STRING2_B;
break;
}
break;
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return string first
return JSP_ATTR_VALUE;
}
}
break;
case ISI_TAG_STRING_B:
case ISI_DIR_STRING_B:
case ISI_TAG_STRING2_B:
case ISI_DIR_STRING2_B:
switch (actChar) {
case '"':
case '\'':
case '\\':
break;
default:
offset--;
break;
}
switch (state) {
case ISI_TAG_STRING_B:
state = ISI_TAG_STRING;
break;
case ISI_DIR_STRING_B:
state = ISI_DIR_STRING;
break;
case ISI_TAG_STRING2_B:
state = ISI_TAG_STRING2;
break;
case ISI_DIR_STRING2_B:
state = ISI_DIR_STRING2;
break;
}
break;
case ISA_ENDSLASH:
switch (actChar) {
case '>':
offset++;
state = ISA_END_JSP;
return JSP_SYMBOL;
case '\n':
state = ISI_TAG_ERROR;
return JSP_SYMBOL;
default:
state = ISP_TAG;
return JSP_SYMBOL;
}
//break; not reached
case ISA_ENDPC:
switch (actChar) {
case '>':
offset++;
state = ISA_END_JSP;
return JSP_SYMBOL;
case '\n':
state = ISI_DIR_ERROR;
return JSP_SYMBOL;
default:
state = ISP_DIR;
return JSP_SYMBOL;
}
//break; not reached
case ISA_LT_PC:
switch (actChar) {
case '@':
offset++;
state = ISA_LT_PC_AT;
return JSP_SYMBOL;
case '-':
state = ISI_JSP_COMMENT;
break;
default: // just cut it, because this will be recognized
// by master syntax as a Java scriptlet/expression/declaration
state = ISA_END_JSP;
return JSP_SYMBOL;
}
break;
// JSP states
case ISI_JSP_COMMENT:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return block comment first
return JSP_COMMENT;
}
case '-':
state = ISI_JSP_COMMENT_M;
break;
}
break;
case ISI_JSP_COMMENT_M:
switch (actChar) {
case '\n':
state = ISI_JSP_COMMENT;
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return block comment first
return JSP_COMMENT;
}
case '-':
state = ISI_JSP_COMMENT_MM;
break;
default:
state = ISI_JSP_COMMENT;
break;
}
break;
case ISI_JSP_COMMENT_MM:
switch (actChar) {
case '\n':
state = ISI_JSP_COMMENT;
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return block comment first
return JSP_COMMENT;
}
case '%':
state = ISI_JSP_COMMENT_MMP;
break;
case '-':
state = ISI_JSP_COMMENT_MM;
break;
default:
state = ISI_JSP_COMMENT;
break;
}
break;
case ISI_JSP_COMMENT_MMP:
switch (actChar) {
case '\n':
state = ISI_JSP_COMMENT;
if (offset == tokenOffset) { // no char
offset++;
return EOL;
} else { // return block comment first
return JSP_COMMENT;
}
case '>':
state = ISA_END_JSP;
offset++;
return JSP_COMMENT;
default:
state = ISI_JSP_COMMENT;
break;
}
break;
case ISI_ERROR:
switch (actChar) {
case '\n':
state = INIT;
return ERROR;
case '<':
state = ISA_LT;
return ERROR;
}
break;
case ISI_TAG_ERROR:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
state = ISI_TAG_I_WS;
return EOL;
} else { // return error first
return ERROR;
}
case '>':
state = ISI_TAG_I_WS;
return ERROR;
}
break;
case ISI_DIR_ERROR:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
state = ISI_DIR_I_WS;
return EOL;
} else { // return error first
return ERROR;
}
case '%':
state = ISI_DIR_ERROR_P;
break;
}
break;
case ISI_DIR_ERROR_P:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
state = ISI_DIR_I_WS;
return EOL;
} else { // return error first
return ERROR;
}
case '>':
offset--;
state = ISI_DIR_I_WS;
return ERROR;
}
break;
case ISA_END_JSP:
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
}
else {
return TEXT;
}
}
break;
// added states
case ISA_LT_PC_AT:
if ((actChar >= 'A' && actChar <= 'Z') ||
(actChar >= 'a' && actChar <= 'z') ||
(actChar == '_')
) { // the directive starts
state = ISI_DIR;
return JSP_TAG;
}
switch (actChar) {
case '\n':
if (offset == tokenOffset) { // no char
offset++;
return EOL;
}
else {
return JSP_TAG;
}
}
break;
}
offset = ++offset;
} // end of while(offset...)
// At this stage there's no more text in the scanned buffer.
// Scanner first checks whether this is completely the last
// available buffer.
if (lastBuffer) {
switch(state) {
case ISI_ERROR:
case ISI_TAG_ERROR:
case ISI_DIR_ERROR:
case ISI_DIR_ERROR_P:
return ERROR;
case ISA_LT:
case ISA_LT_PC:
case ISA_ENDSLASH:
case ISA_ENDPC:
case ISP_TAG_EQ:
case ISP_DIR_EQ:
return JSP_SYMBOL;
case ISI_TAG:
case ISI_DIR:
case ISI_ENDTAG:
return JSP_TAG;
case ISP_TAG:
case ISP_DIR:
case ISI_TAG_I_WS:
case ISI_DIR_I_WS:
case ISA_LT_PC_AT:
return JSP_TAG;
case ISI_TAG_ATTR:
case ISI_DIR_ATTR:
return JSP_ATTRIBUTE;
case ISI_TAG_STRING:
case ISI_DIR_STRING:
case ISI_TAG_STRING_B:
case ISI_DIR_STRING_B:
case ISI_TAG_STRING2:
case ISI_DIR_STRING2:
case ISI_TAG_STRING2_B:
case ISI_DIR_STRING2_B:
return JSP_ATTR_VALUE;
case ISI_JSP_COMMENT:
case ISI_JSP_COMMENT_M:
case ISI_JSP_COMMENT_MM:
case ISI_JSP_COMMENT_MMP:
return JSP_COMMENT;
case ISA_END_JSP:
return TEXT;
default:
if (Boolean.getBoolean("netbeans.debug.exceptions"))
new Exception("Unhandled state : " + getStateName(state)).printStackTrace();
}
}
// At this stage there's no more text in the scanned buffer, but
// this buffer is not the last so the scan will continue on another buffer.
// The scanner tries to minimize the amount of characters
// that will be prescanned in the next buffer.
// pending
switch(state) {
case ISI_ERROR:
case ISI_TAG_ERROR:
case ISI_DIR_ERROR:
case ISI_DIR_ERROR_P:
return ERROR;
case ISA_LT:
case ISA_LT_PC:
case ISA_ENDSLASH:
case ISA_ENDPC:
case ISP_TAG_EQ:
case ISP_DIR_EQ:
return JSP_SYMBOL;
case ISI_TAG:
case ISI_DIR:
case ISI_ENDTAG:
return JSP_TAG;
case ISP_TAG:
case ISP_DIR:
case ISI_TAG_I_WS:
case ISI_DIR_I_WS:
case ISA_LT_PC_AT:
return JSP_TAG;
case ISI_TAG_ATTR:
case ISI_DIR_ATTR:
return JSP_ATTRIBUTE;
case ISI_TAG_STRING:
case ISI_DIR_STRING:
case ISI_TAG_STRING_B:
case ISI_DIR_STRING_B:
case ISI_TAG_STRING2:
case ISI_DIR_STRING2:
case ISI_TAG_STRING2_B:
case ISI_DIR_STRING2_B:
return JSP_ATTR_VALUE;
case ISI_JSP_COMMENT:
case ISI_JSP_COMMENT_M:
case ISI_JSP_COMMENT_MM:
case ISI_JSP_COMMENT_MMP:
return JSP_COMMENT;
case ISA_END_JSP:
return TEXT;
}
return EOT;
}
public String getTokenName(int tokenID) {
switch (tokenID) {
case TEXT:
return TN_TEXT;
case ERROR:
return TN_ERROR;
case JSP_TAG:
return TN_JSP_TAG;
case JSP_SYMBOL:
return TN_JSP_SYMBOL;
case JSP_COMMENT:
return TN_JSP_COMMENT;
case JSP_ATTRIBUTE:
return TN_JSP_ATTRIBUTE;
case JSP_ATTR_VALUE:
return TN_JSP_ATTR_VALUE;
case JSP_SYMBOL2:
return TN_JSP_SYMBOL2;
default:
return super.getTokenName(tokenID);
}
}
public String getStateName(int stateNumber) {
switch(stateNumber) {
case ISI_ERROR : return "jsptag_ISI_ERROR";
case ISA_LT : return "jsptag_ISA_LT";
case ISI_TAG : return "jsptag_ISI_TAG";
case ISI_DIR : return "jsptag_ISI_DIR";
case ISP_TAG : return "jsptag_ISP_TAG";
case ISP_DIR : return "jsptag_ISP_DIR";
case ISI_TAG_I_WS : return "jsptag_ISI_TAG_I_WS";
case ISI_DIR_I_WS : return "jsptag_ISI_DIR_I_WS";
case ISI_ENDTAG : return "jsptag_ISI_ENDTAG";
case ISI_TAG_ATTR : return "jsptag_ISI_TAG_ATTR";
case ISI_DIR_ATTR : return "jsptag_ISI_DIR_ATTR";
case ISP_TAG_EQ : return "jsptag_ISP_TAG_EQ";
case ISP_DIR_EQ : return "jsptag_ISP_DIR_EQ";
case ISI_TAG_STRING : return "jsptag_ISI_TAG_STRING";
case ISI_DIR_STRING : return "jsptag_ISI_DIR_STRING";
case ISI_TAG_STRING_B : return "jsptag_ISI_TAG_STRING_B";
case ISI_DIR_STRING_B : return "jsptag_ISI_DIR_STRING_B";
case ISI_TAG_STRING2 : return "jsptag_ISI_TAG_STRING2";
case ISI_DIR_STRING2 : return "jsptag_ISI_DIR_STRING2";
case ISI_TAG_STRING2_B : return "jsptag_ISI_TAG_STRING2_B";
case ISI_DIR_STRING2_B : return "jsptag_ISI_DIR_STRING2_B";
case ISA_ENDSLASH : return "jsptag_ISA_ENDSLASH";
case ISA_ENDPC : return "jsptag_ISA_ENDPC";
case ISA_LT_PC : return "jsptag_ISA_LT_PC";
case ISI_JSP_COMMENT : return "jsptag_ISI_JSP_COMMENT";
case ISI_JSP_COMMENT_M : return "jsptag_ISI_JSP_COMMENT_M";
case ISI_JSP_COMMENT_MM : return "jsptag_ISI_JSP_COMMENT_MM";
case ISI_JSP_COMMENT_MMP : return "jsptag_ISI_JSP_COMMENT_MMP";
case ISA_END_JSP : return "jsptag_ISA_END_JSP";
case ISI_TAG_ERROR : return "jsptag_ISI_TAG_ERROR";
case ISI_DIR_ERROR : return "jsptag_ISI_DIR_ERROR";
case ISI_DIR_ERROR_P : return "jsptag_ISI_DIR_ERROR_P";
case ISA_LT_PC_AT : return "jsptag_ISA_LT_PC_AT";
default:
return super.getStateName(stateNumber);
}
}
}
/*
* Log
* 4 Gandalf-post-FCS1.2.2.0 4/5/00 Petr Jiricka Token names and examples
* from bundles.
* 3 Gandalf 1.2 2/14/00 Petr Jiricka Eased conditions for
* syntax of directives to prevent bogus red error text.
* 2 Gandalf 1.1 2/11/00 Petr Jiricka Numerous small fixes.
* 1 Gandalf 1.0 2/10/00 Petr Jiricka
* $
*/